setwd("/Users/Allan/Dropbox/!!Papers/Methods/Science Deserves Better/Rep_Files/")

#This is the original data file.
d <- read.delim("Primary_Files/JournalArticles_ReplicationFiles_Peter_tab.txt")
d <- d[!is.na(d$Year.Published),]

names(d)
names(d)[5:8] <- c("StateAv","RepRef","RepAv","UpReq")

set.seed(1234)
d <- d[d$StateAv!="",]

###Produce RepFilesAv variable, which should be 1 if coded as 1, or if RepAv=Yes###
#Also checking that second round of coding went correctly. 
table(d$StateAv, d$RepFilesAv)
d[d$StateAv=="No" & d$RepFilesAv==".",]

table(d$StateAv, d$RepAv)

d$RepFilesAv[d$StateAv=="Yes" & d$RepAv=="Yes"] <- 1
d$RepFilesAv[d$StateAv=="Yes" & d$RepAv=="No"] <- 0
d$RepFilesAv <- as.numeric(as.character(d$RepFilesAv))

table(d$StateAv, d$RepFilesAv)
table(d$RepAv, d$RepFilesAv)
#RepFilesAv corrected 30/190 from RepAv by checking for replication files for those articles that didn't state availability

table(d$Journal[d$RepAv=="No" & d$RepFilesAv==1])
#About equally split between journals. 
#########

write.csv(d, "Primary_Files/JournalArticles_ReplicationFiles.csv")


#Proportion who state that replication files are available:
#We found that 48\% of publications employing statistical analysis stated that replication files were available at a website, though we were only able to find replication files for 68\% of these; we were also able to find replication files for 18\% of publications that did not state the availability of replication files.
sum(d$StateAv=="Yes" & d$UpReq=="No")/length(d$StateAv)
#.48

sum(d$RepFilesAv[d$StateAv=="Yes"]==1)/sum(d$StateAv=="Yes")
#.68

sum(d$RepFilesAv[d$StateAv=="No"]==1)/sum(d$StateAv=="No")
#.18

#"4% of publications state that replication files are available upon request."
sum(d$UpReq=="Yes")/length(d$StateAv)
#0.04

#Proportion State Available by Journal
sum(d$StateAv=="Yes" & d$UpReq=="No" & d$Journal=="AJPS")/sum(d$Journal=="AJPS")
#.61
sum(d$StateAv=="Yes" & d$UpReq=="No" & d$Journal=="APSR")/sum(d$Journal=="APSR")
#.16

#Proportion Is Available by Journal
sum(d$RepFilesAv==1  & d$Journal=="AJPS")/sum(d$Journal=="AJPS")
#.49

sum(d$RepFilesAv==1  & d$Journal=="AJPS" & d$Year>2010)/sum(d$Journal=="AJPS" & d$Year>2010)
#.65
sum(d$RepFilesAv==0  & d$Journal=="AJPS" & d$Year>2010)/sum(d$Journal=="AJPS" & d$Year>2010)

sum(d$RepFilesAv==1  & d$Journal=="APSR")/sum(d$Journal=="APSR")
#.31



###Producing Figure 1###
library("ggplot2")
library(colorspace)


d$y <- rep(NA, length(d[,1]))
d$y[d$StateAv=="Yes"] <- 1 
d$y[d$StateAv=="No"] <- 0

d$y2 <- rep(NA, length(d[,1]))
d$y2[d$RepFilesAv==1] <- 1 
d$y2[d$RepFilesAv==0] <- 0

d$x <- d$Year.Published
d$xj <- d$x + runif(length(d$x),-0.3,0.3)
d$yj <- d$y + runif(length(d$y),-0.03,0.03)
#d$yj <- 0.96*d$y + runif(length(d$x),-0.03,0.03)
d$y2j <- d$y2 + runif(length(d$y2),-0.03,0.03)

height <- 6


ggplot(d, aes(y=y, x=as.numeric(Year.Published))) + facet_grid(. ~ Journal) + theme_bw() +
  ylab("Publication States that Replication Files Are Available") +  
  xlab("Year") +
  stat_smooth(method="glm", family="binomial", formula= y ~ poly(x,3), fill = "blue", size=1.2, alpha=0.05)  + 
  geom_point(aes(y=yj, x=xj), alpha=0.5) +
    theme(axis.ticks=element_blank(), panel.grid.minor.y=element_blank()) 
#   scale_y_continuous(breaks=c(0, 1), labels=c("No", "Yes"))
ggsave("Output/Fig1A.pdf", width=1.6*height, height=height)

p2 <- ggplot(d, aes(y=y2, x=as.numeric(Year.Published))) + facet_grid(. ~ Journal) + theme_bw() +
  ylab("Replication Files Are Publicly Available") +  
  xlab("Year") 
#+ ylim(0,1)
p2 +  stat_smooth(method="glm", family="binomial", formula= y ~ poly(x,3), fill = "blue", size=1.2, alpha=0.05)  + theme(axis.ticks=element_blank(), panel.grid.minor.y=element_blank())  +
  geom_point(aes(y=y2j, x=xj), alpha=0.5)
ggsave("Output/Fig1B.pdf", width=1.6*height, height=height)


